# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(plm)
library(lfe)
library(dplyr)
library(sandwich)

rm(list = ls())

# merge stock returns with predicted rating changes (based on Dec 2001 data)
data = readRDS('input_data/predicted_stock_rating_change_dec_2001.RDS')
tmp = readRDS('input_data/monthly_stock_return.RDS')[, list(yyyymm, permno, primexch, ret, me_1)]
tmp = tmp[yyyymm %in% 200201:200212]
data = merge(data, tmp, by = c('permno')); rm(tmp)

# get stocks' size and value characteristics
tmp = readRDS('input_data/stock_style_characteristics.RDS')
data = merge(data, tmp, by = c('yyyymm','permno')); rm(tmp)
data_bk = copy(data)

# sort stocks by within nBins x nBins style predicted rating changes. output idiosyncratic returns
p.getOne = function(nBins){
  data = copy(data_bk)
  
  # get style bins
  data[, c('bin_mve','bin_bm') := 1]
  for (i in 1:(nBins-1)){
    data = merge(data, data[primexch == 'N', list(cut_mve = quantile(mve, i/nBins), 
                                                  cut_bm = quantile(bm, i/nBins)), yyyymm], by = 'yyyymm')
    data[mve > cut_mve, bin_mve := i+1]
    data[bm > cut_bm,  bin_bm := i+1]
    data[, c('cut_mve','cut_bm') := NULL]
  }

  # remove style-level ratings
  data = merge(data, data[, list(rating_change_prediction_s = weighted.mean(rating_change_prediction, me_1)), list(yyyymm, bin_mve, bin_bm)], by = c('yyyymm','bin_mve','bin_bm'))
  data[, rating_change_prediction_i := rating_change_prediction - rating_change_prediction_s]
  data[, rating_change_prediction := NULL]
  
  # remove style-level returns
  data = merge(data, data[, list(ret_s = weighted.mean(ret, me_1)), list(yyyymm, bin_mve, bin_bm)], by = c('yyyymm','bin_mve','bin_bm'))
  data[, ret := ret - ret_s]
  data[, ret_s := NULL]
  
  # Sort within style into quintiles by idiosyncratic predicted rating changes
  tmp = data[yyyymm == min(yyyymm)]
  tmp[, bin := 1]
  for (i in 1:4){
    tmp = merge(tmp, tmp[, list(cut = quantile(rating_change_prediction_i, i/5)), list(bin_mve, bin_bm)], by = c('bin_mve','bin_bm'), all.x = T)
    tmp[rating_change_prediction_i > cut, bin := bin + 1]
    tmp[, cut := NULL]
  }
  tmp = tmp[, list(permno, bin)]
  data = merge(data, tmp, by = 'permno'); rm(tmp)
  
  # summarize returns by bin and output
  data = data[, list(ret = weighted.mean(ret, me_1)), list(yyyymm, bin)]
  data = data[order(yyyymm)]
  data[, numStyleBins := nBins]
  return(data)
}
data = Reduce(rbind, lapply(c(3,5), p.getOne)); rm(data_bk)

# compute before vs after 2002 changes
data[, post := ifelse(yyyymm > 200206, 1, 0)]
data = merge(data[post == 1, list(ret_post2002 = mean(ret)), list(numStyleBins, bin)],
             data[post == 0, list(ret_pre2002 = mean(ret)), list(numStyleBins, bin)])
data = data[, list(bin, numStyleBins, ret_change = ret_post2002 - ret_pre2002)]

# Demean to focus on cross-sectional dispersion
data[numStyleBins == 3, ret_change := ret_change - mean(ret_change)]
data[numStyleBins == 5, ret_change := ret_change - mean(ret_change)]

data = data[order(numStyleBins, -bin)]
data[, bin_lab := rep(c('Positive change', '2', '3', '4', 'Negative change'),2)]
data[, lab := ifelse(numStyleBins == 3, 'Controlling for 3x3 styles', 'Controlling for 5x5 styles')]

# plot
ggplot(data, aes(x = reorder(bin_lab, -bin), y = ret_change, fill = lab)) + geom_bar(stat = 'identity', position = 'dodge') + 
  coord_cartesian(ylim = c(-.03, .03)) + theme_classic() + 
  theme(legend.title = element_blank(), legend.position = c(.8, .8)) + labs(x = NULL, y = 'Change of monthly idiosyncratic return') + 
  scale_y_continuous(labels = scales::percent_format(accuracy = 1))

